home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- """ SPARQL Lexer, Parser and Function-Mapper
- By Shawn Brown <http://shawnbrown.com/contact>
-
- TO DO:
- swap current parser functions for Michelp's pyparsing setup
- add mapping for FILTER/constraints
- typed literals
- integer, double or boolean abbreviations
- language tags (e.g., @fr)
- nested OPTIONALs ???
- blank node and RDF collection syntax ???
- GRAPH statements ???
-
- CURRENTLY SUPPORTED:
- Simple SELECT queries
- Predicate-object and object list shorthand
- (e.g., ?x foaf:name ?name ; foaf:mbox ?mbox ; vcard:TITLE ?title)
- Multi-line/triple-quoted literals
- BASE, PREFIX, SELECT, WHERE, UNION, OPTIONAL, multiple UNIONs and multiple
- OPTIONALs (but not nested OPTIONALs)
-
- USAGE:
- #from sparql_lpm import doSPARQL
- from rdflib.sparql.parser import doSPARQL
- ...load graph...
- ...define SPARQL query as string...
- result = doSPARQL(queryStr, sparqlGr)
-
- """
- import base64
- import re
- from rdflib.URIRef import URIRef
- from rdflib.sparql.graphPattern import GraphPattern
-
- def _escape(text):
- return base64.encodestring(text).replace('\n', '')
-
-
- def _unescape(text):
- return base64.decodestring(text)
-
-
- def _escapeLiterals(query):
- ''' escape all literals with escape() '''
-
- fn = lambda m: "'" + _escape(m.group(2)) + "'" + m.group(3)
- pat = '(\\"\\"\\"|\'\'\'|[\\"\'])([^\\1]*?[^\\\\]?)\\1'
- return re.sub(pat + '(\\s*[.,;\\}])', fn, query)
-
-
- def _resolveShorthand(query):
- ''' resolve some of the syntactic shorthand (2.8 Other Syntactic Forms) '''
-
- def doList(pat, text):
- pat = re.compile(pat)
- while pat.search(text):
- text = re.sub(pat, '\\1\\2\\3 . \\2\\4', text)
- return text
-
- pat = '(\\{.*?)([^ ]+ )([^ ]+ [^ ]+)\\s?; ([^ ]+ [^ ]+\\s?[,;\\.\\}])'
- query = doList(pat, query)
- pat = '(\\{.*?)([^ ]+ [^ ]+ )([^ ]+\\s?), ([^ ]+\\s?[,\\.\\}])'
- query = doList(pat, query)
- return query
-
-
- def _resolvePrefixes(query):
- ''' resolve prefixed IRIs, remove PREFIX statements '''
- prefixes = re.findall('PREFIX ([\\w\\d]+:) <([^<>]+)>', query)
- prefixes.extend([
- ('rdf:', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'),
- ('rdfs:', 'http://www.w3.org/2000/01/rdf-schema#'),
- ('xsd:', 'http://www.w3.org/2001/XMLSchema#'),
- ('fn:', 'http://www.w3.org/2004/07/xpath-functions')])
- matches = re.search('PREFIX : <([^<>]+)>', query)
- if matches != None:
- prefixes.append((':', matches.group(1)))
-
- query = re.sub('PREFIX [\\w\\d]*:[ ]?<[^<>]+>[ ]?', '', query)
-
- fn = lambda m: '<' + _escape(m.group(1)) + '>'
- query = re.sub('<([^<>]+)>', fn, query)
- for fn in prefixes:
- pair = None
- query = re.sub(pair[0] + '([^ .\\}]+)', fn, query)
-
- return query
-
-
- def _resolveBase(query):
- ''' resolve relative IRIs using BASE IRI, remove BASE statement '''
- pat = re.compile('BASE <([^<>]+)>\\s?')
- base = pat.search(query)
- if base != None:
-
- fn = lambda m: '<' + base.group(1) + m.group(1) + '>'
- query = re.sub('<([^<>: ]+)>', fn, query)
- query = re.sub(pat, '', query)
-
- return query
-
-
- def _parseSelect(query):
- ''' returns tuple of SELECTed variables or None '''
- var = '[?$][\\w\\d]+'
- select = re.search('SELECT(?: ' + var + ')+', query)
- if select != None:
- select = re.findall(var, select.group(0))
- select = tuple(select)
-
- return select
-
-
- class _StackManager:
- ''' manages token stack for _parser() '''
-
- def _StackManager__tokenGen(self, tokens):
- for token in tokens:
- yield token
-
-
-
- def __init__(self, tokenList):
- self.stack = self._StackManager__tokenGen(tokenList)
- self.current = self.stack.next()
-
-
- def next(self):
-
- try:
- self.current = self.stack.next()
- if self.current == '':
- self.next()
- except StopIteration:
- self.current = None
-
-
-
- def token(self):
- return self.current
-
-
-
- class Where(list):
- pass
-
-
- class Union(list):
- pass
-
-
- class Optional(list):
- pass
-
- _listTypes = {
- 'OPTIONAL': (lambda : Optional([])),
- 'UNION': (lambda : Union([])),
- 'WHERE': (lambda : Where([])) }
-
- def _makeList(keyword):
- ''' return list of given type or None '''
- if keyword in _listTypes:
- return _listTypes[keyword]()
-
-
- def _parser(stack, listType = 'WHERE'):
- ''' simple recursive descent SPARQL parser '''
- typedList = _makeList(listType)
- nestedType = listType
- while stack.token() != None:
- token = stack.token()
- if _makeList(token) != None:
- nestedType = token
- elif token == '{':
- stack.next()
- typedList.append(_parser(stack, nestedType))
- nestedType = listType
- elif token == '}':
- return typedList
- if token != '.':
- statement = ''
- while token != None and token != '.' and token != '{' and token != '}':
- statement += ' ' + token
- stack.next()
- token = stack.token()
- statement = statement.strip()
- typedList.append(statement)
- continue
-
- stack.next()
- return typedList
-
-
- def _parseWhere(query):
- ''' split query into tokens, return parsed object '''
- stackObj = _StackManager(query)
- return _parser(stackObj)
-
-
- def _findStatements(stmntType, stmntList):
- ''' recurse over nested list, compile & return flat list of matching
- statement strings used by _getStatements() '''
- statements = []
- typedList = _makeList(stmntType)
- for stmnt in stmntList:
- if type(stmnt) is str:
- statements.append(stmnt)
-
- if type(stmnt) == type(typedList):
- statements.extend(_findStatements(stmntType, stmnt))
- continue
-
- return statements
-
-
- def _getStatements(stmntType, stmntList):
- ''' gets statements of given type from given list '''
- statements = []
- typedList = _makeList(stmntType)
- for item in stmntList:
- if type(item) == type(typedList):
- statements.append(_findStatements(stmntType, item))
- continue
-
- return statements
-
-
- def _buildGraphPattern(triples):
- triples = map((lambda x: tuple(re.split(' ', x))), triples)
-
- isIRI = lambda x: if x[0] == '<':
- passx[-1] == '>'
-
- isLit = lambda x: if (x[0] == "'" or x[-1] == "'") and x[0] == '"':
- passx[-1] == '"'
- for i in range(len(triples)):
- sub = triples[i][0]
- pred = triples[i][1]
- obj = triples[i][2]
- if isIRI(sub):
- sub = URIRef(_unescape(sub[1:-1]))
-
- if isIRI(pred):
- pred = URIRef(_unescape(pred[1:-1]))
-
- if isIRI(obj):
- obj = URIRef(_unescape(obj[1:-1]))
- elif isLit(obj):
- obj = _unescape(obj[1:-1])
-
- triples[i] = (sub, pred, obj)
-
- return GraphPattern(triples)
-
-
- def _buildQueryArgs(query):
- ''' '''
- query = _escapeLiterals(query)
- query = re.sub('\\s+', ' ', query).strip()
- query = _resolveShorthand(query)
- query = _resolveBase(query)
- query = _resolvePrefixes(query)
- query = re.sub('\\s*([.;,\\{\\}])\\s*', ' \\1 ', query)
- whereObj = query[query.find('{') + 1:query.rfind('}')].strip()
- whereObj = whereObj.split(' ')
- select = _parseSelect(query)
- whereObj = _parseWhere(whereObj)
- where = _getStatements('WHERE', [
- whereObj])
- where.extend(_getStatements('UNION', whereObj))
- where = map(_buildGraphPattern, where)
- optional = _getStatements('OPTIONAL', whereObj)
- optional = map(_buildGraphPattern, optional)
- return {
- 'select': select,
- 'where': where,
- 'optional': optional }
-
-
- def doSPARQL(query, sparqlGr):
- ''' Takes SPARQL query & SPARQL graph, returns SPARQL query result object. '''
- x = _buildQueryArgs(query)
- return sparqlGr.query(x['select'], x['where'], x['optional'])
-
- if __name__ == '__main__':
- testCases = [
- '\nSELECT ?name\nWHERE { ?a <http://xmlns.com/foaf/0.1/name> ?name }\n',
- '\nPREFIX foaf: <http://xmlns.com/foaf/0.1/>\nSELECT ?name\nWHERE { ?a foaf:name ?name }\n',
- '\nBASE <http://xmlns.com/foaf/0.1/>\nSELECT ?name\nWHERE { ?a <name> ?name }\n',
- '\nPREFIX : <http://xmlns.com/foaf/0.1/>\nPREFIX vcard: <http://www.w3.org/2001/vcard-rdf/3.0#>\nSELECT ?name ?title\nWHERE {\n ?a :name ?name .\n ?a vcard:TITLE ?title\n}\n',
- '\nPREFIX foaf: <http://xmlns.com/foaf/0.1/>\nSELECT ?name ?mbox\nWHERE {\n ?x foaf:name ?name ;\n foaf:mbox ?mbox .\n}\n',
- '\nPREFIX foaf: <http://xmlns.com/foaf/0.1/>\nSELECT ?x\nWHERE {\n ?x foaf:nick "Alice" ,\n "Alice_" .\n}\n',
- '\nPREFIX tag: <http://xmlns.com/foaf/0.1/>\nPREFIX vcard: <http://www.w3.org/2001/vcard-rdf/3.0#>\nSELECT ?name\nWHERE {\n ?a tag:name ?name ;\n vcard:TITLE "escape test vcard:TITLE " ;\n <tag://test/escaping> "This is a \'\'\' Test """" ;\n <tag://test/escaping> ?d\n}\n',
- '\nPREFIX foaf: <http://xmlns.com/foaf/0.1/>\nSELECT ?PREFIX ?WHERE\nWHERE {\n ?x foaf:name ?PREFIX ;\n foaf:mbox ?WHERE .\n}\n',
- '\nPREFIX WHERE: <http://xmlns.com/foaf/0.1/>\nSELECT ?name ?mbox\nWHERE {\n ?x WHERE:name ?name ;\n WHERE:mbox ?mbox .\n}\n',
- 'SELECT ?title WHERE { <http://example.org/book/book1> <http://purl.org/dc/elements/1.1/title> ?title . }',
- 'PREFIX foaf: <http://xmlns.com/foaf/0.1/>\nSELECT ?name ?mbox\nWHERE { ?person foaf:name ?name .\nOPTIONAL { ?person foaf:mbox ?mbox}\n}',
- 'PREFIX foaf: <http://xmlns.com/foaf/0.1/>\nSELECT ?name ?name2\nWHERE { ?person foaf:name ?name .\nOPTIONAL { ?person foaf:knows ?p2 . ?p2 foaf:name ?name2 . }\n}',
- 'PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n#PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nSELECT ?name ?mbox\nWHERE\n{\n{ ?person rdf:type foaf:Person } .\nOPTIONAL { ?person foaf:name ?name } .\nOPTIONAL {?person foaf:mbox ?mbox} .\n}']
- print 'Content-type: text/plain\n\n'
- for query in testCases:
- print '\n-----\n'
- print '>>> query = """' + query.replace('\n', '\n... ') + '"""'
- print '>>> result = doSPARQL(query, sparqlGr)\n'
- result = _buildQueryArgs(query)
- print 'select = ', result['select'], '\n'
- print 'where = ', result['where'], '\n'
- print 'optional = ', result['optional'], '\n'
- print 'result = sparqlGr.query(select, where, optional)'
-
-
-